tr <- fread("PDF/trPDF.csv", header = T, showProgress = F)
te <- fread("PDF/tePDF.csv", header = T, showProgress = F)
target <- tr$target
tr$target <- NULL
summary(as.factor(target))
## 0 1
## 179902 20098
tr$ID_code <- NULL
te$ID_code <- NULL
plot_str(tr)
plot_str(te)
only_contiguous
introduce(tr)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1: 200000 200 0 200 0
## total_missing_values complete_rows total_observations memory_usage
## 1: 0 200000 40000000 320037768
introduce(te)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1: 200000 200 0 200 0
## total_missing_values complete_rows total_observations memory_usage
## 1: 0 200000 40000000 320037768
tr %>%
cor(use="complete.obs") %>%
corrplot(type="lower", diag=FALSE)
te %>%
cor(use="complete.obs") %>%
corrplot(type="lower", diag=FALSE)
No corrlation (Nomarized data?)
tr %>%
sin() %>%
cor(use="complete.obs") %>%
corrplot(type="lower", diag=FALSE)
te %>%
sin() %>%
cor(use="complete.obs") %>%
corrplot(type="lower", diag=FALSE)